Overview 🔍

This vignette walks you through the nuclei segmentation of DAPI images from your MERFISH experiment. There are numerous tools available for this purpose, but we will be covering a Python-based model called CellPose (Stringer et al. 2021).

  1. Setup the python environment for cell segmentation model🔅:

Type this into your Anaconda prompt/ terminal:

conda create -n scipyenv python=3.9 pandas numpy=1.26.4
conda activate scipyenv
python -m pip install cellpose==3.0.11

where python # this will give the path to the location of the python in a Windows machine

Add the python path to your establishParams() call:

library(masmr)
library(ggplot2)
library(imager)
library(patchwork)
library(RBioFormats)
options(java.parameters = "-Xmx8g")

data_dir <- "~/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/"
out_dir <- "~/OUT"

establishParams(
  imageDirs         =  paste0(data_dir, 'S10/'),
  imageFormat       = ".ome.tif",
  codebookFileName  = paste0(data_dir, "codebook/Jin_codebook_B.tsv"),
  outDir            = out_dir,
  dapiDir           = paste0(data_dir, "DAPI_1/"),
  fpkmFileName      = paste0(data_dir, "FPKM_data/Jin_FPKMData_B.tsv"),
  nProbesFileName   = paste0(data_dir, "ProbesPerGene.csv"),
  pythonLocation    = "/home/ubuntu/miniconda3/envs/scipyenv/bin/python" # new
)
## Assuming .ome.tif is extension for meta files...
## Warning in establishParams(imageDirs = paste0(data_dir, "S10/"), imageFormat = ".ome.tif", : codebookColumnNames not provided : will provide placeholders...
## resumeMode = TRUE...
## 100 images across 16 cycles ( 4 ON-bit codes )...
readImageMetaData("dapi_dir") #Switch to DAPI images and prepare meta data
## Warning in readImageMetaData("dapi_dir"): Updating params$fov_names
## Warning in readImageMetaData("dapi_dir"): Unsuccessful attempt at correcting placeholder codebook column names (params$codebook_colnames)
## Warning in readImageMetaData("dapi_dir"): bit_name not matching codebookColumnNames: either edit GLOBALCOORD.csv or params$codebook_colnames (ignore this
## warning if codebook not needed, e.g. DAPI)
establishCleanSlate()
##  [1] ".Random.seed"        "all_markers"         "bit_number"          "cellExp"             "cellIDs_a"           "cleanSlate"          "clusterInfo"        
##  [8] "codebook"            "codebookColumnNames" "cxWindow"            "dapi_files"          "data_dir"            "distanceMetric"      "file"               
## [15] "filtout"             "forExclusion"        "forReg"              "fpkm_mat"            "gcm"                 "gcm_mat"             "gcm_t"              
## [22] "genePalette"         "hnnDistBins"         "i"                   "im"                  "img1"                "img2"                "imList"             
## [29] "imMetricName"        "imMetrics"           "imsub"               "maxClusterSize"      "metric"              "n"                   "nuclei_area"        
## [36] "out_dir"             "p_g"                 "p0"                  "p1"                  "p2"                  "p3"                  "p4"                 
## [43] "params"              "PC_dim"              "percBlank"           "probabilityBLANK"    "q_a1"                "q_a2"                "s10_dirs"           
## [50] "s10_folder"          "series"              "seur_obj"            "seur_obj.markers"    "spotcalldf"          "thresh"              "top5_markers"       
## [57] "troubleshootPlots"
params$current_fov <- params$fov_names[38]

knitr::kable( head( params$global_coords), format = 'html', booktabs = TRUE )
x_microns y_microns z_microns image_file fov bit_name_full bit_name_alias bit_name
-2645.753 3421.847 5678.52 /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_000.ome.tif MMStack_1-Pos_000_000 DAPI_1_dapi 0_dapi DAPI_1_dapi
-2645.753 3609.926 5678.54 /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_001.ome.tif MMStack_1-Pos_000_001 DAPI_1_dapi 0_dapi DAPI_1_dapi
-2645.753 3798.004 5678.56 /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_002.ome.tif MMStack_1-Pos_000_002 DAPI_1_dapi 0_dapi DAPI_1_dapi
-2645.753 3986.082 5678.58 /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_003.ome.tif MMStack_1-Pos_000_003 DAPI_1_dapi 0_dapi DAPI_1_dapi
-2645.753 4174.161 5678.60 /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_004.ome.tif MMStack_1-Pos_000_004 DAPI_1_dapi 0_dapi DAPI_1_dapi
-2645.753 4362.239 5678.38 /home/rstudio/JIN_SIM/20190718_WK_T2_a_B2_R8_12_d50/DAPI_1//DAPI_1_MMStack_1-Pos_000_005.ome.tif MMStack_1-Pos_000_005 DAPI_1_dapi 0_dapi DAPI_1_dapi
  1. Check the format of your DAPI images 📲
params$current_fov <- params$fov_names[38]
im <- readImageList( params$current_fov )[[1]]
## 
## Reading images...
## 1 of 1...
plot(imager::as.cimg(im))
## Warning in as.cimg.array(im): Assuming third dimension corresponds to time/depth

  1. Cell Segmentation using default Cellpose Nuclei model 🔎
establishCellSegModel(cellposeModelType = "nuclei")
## 
## Preparing cell segmentation model(s)...
## Cellpose packaged model...
## Cell segmentation with CELLPOSE...
cellMask <- runCellSegModel(im, masksOnly = TRUE)
## Running CELLPOSE...
df <- as.data.frame(imager::as.cimg(cellMask))
ggplot() +
  geom_raster( data = as.data.frame(imager::as.cimg(im)), 
               aes(x=x, y=y, fill=value) ) +
  scale_fill_gradient(low='black', high='white') +
  geom_point( data=df[df$value>0,], 
              aes(x=x, y=y, colour=factor(value)),
              shape = '.') +
  scale_colour_viridis_d( option='turbo' ) +
  theme_void( base_size = 14 ) +
  scale_y_reverse() +
  coord_fixed() +
  theme(legend.position = 'none')
## Warning in as.cimg.array(im): Assuming third dimension corresponds to time/depth

The default Cellpose segmentation model does not work well for my cortical organoid data. The user is highly encouraged to retrain the default cellpose using its graphical user interface (GUI) (https://github.com/MouseLand/cellpose). It takes a few hours and about 5-7 images for the cellpose cyto segmentation model to start performing at the “eye-level”.

  1. Cell Segmentation using the custom Cellpose Nuclei model 🏆
establishCellSegModel(
  cellposePreTrainedModel =
    '/home/rstudio/CP_model3_newB4_800epoch_lr0pt01_FINAL'
)
## 
## Preparing cell segmentation model(s)...
## User-pretrained Cellpose model...
## Cell segmentation with CELLPOSE...
cellMask <- runCellSegModel(im)
## Running CELLPOSE...
df <- as.data.frame(imager::as.cimg(cellMask))
ggplot() +
  geom_raster( data = as.data.frame(imager::as.cimg(im)), 
               aes(x=x, y=y, fill=value) ) +
  scale_fill_gradient(low='black', high='white') +
  geom_point( data=df[df$value>0,], 
              aes(x=x, y=y, colour=factor(value)),
              shape = '.') +
  scale_colour_viridis_d( option='turbo' ) +
  theme_void( base_size = 14 ) +
  scale_y_reverse() +
  coord_fixed() +
  theme(legend.position = 'none')

5) Run the custom Cellpose model on all DAPI files 🏁

establishCellSegModel(
  cellposePreTrainedModel =
    '/home/rstudio/CP_model3_newB4_800epoch_lr0pt01_FINAL'
)

params$verbose <- F
for( fovName in params$fov_names ){
  params$current_fov <- fovName
  
    ## Report current FOV / iteration 
  message( paste0(
    '\n', fovName, ': ', 
    which(params$fov_names==fovName), ' of ', 
    length(params$fov_names), '...' ) )

  im <- readImageList( params$current_fov )[[1]]
  cellMask <- runCellSegModel(im)
  saveCellSegMasks(cellMask, im = im)
  }

🎉 Congratulations! You have just finished cell segmentation! We are half-way there to generating a gene-cell matrix.

Advanced usage examples covered in the advanced usage handbook: - Reading in more complicated Z-stack and multiple channel structure in DAPI images. - Preprocessing DAPI images for better cell segmentation (e.g. brightening DAPI images). - Dilating nuclei masks with arbitrary distance to capture cytoplasmic spots. - Applying Stardist cell segmentation model.